1. Source Cross Validate Functions

source("./Mean Reversion/RMR.010 Cross Validate Functions.R") 

2. Load Data

pricing_data <- read_csv("./Mean Reversion/Raw Data/pricing data.csv", col_types = c("iTdddddddci")) 

3. Set Parameters

Arguments
pricing_data: A dataframe containing pricing data from Poloneix gathered in tidy format.
time_resolution: The number of seconds that each observation spans. Takes values 300, 900, 1800, 7200, 14400, and 86400.
cutoff_date: A data representing the cutoff date between the train and test sets.
train_window: A period object from lubridate representing the length of time the train set covers.
test_window: A period object from lubridate representing the length of time the the test set covers. quote_currency: A string indicating the quote currency of the currency pairs. Can take values USDT or BTC.
adf_threshold: The threshold for the ADF test statistic. Pairs below this threshold are selected.
rolling_window: The number of observations used in each iteration of a rolling linear regression.
stop_threshold: A threshold for the spread z-score beyond which the strategy stops trading the coin pair.
signal_logic: A string indicating which logic to use to generate signals.
model_type: A string indicating whether raw prices or log prices should be used. Takes value “raw” or “log”.
number_pairs: The number of pairs to generate plots for.

time_resolution <- 300 
train_window <- days(16) 
test_window <- days(10) 
quote_currency <- "USDT" 
adf_threshold <- -2.96 
# rolling_window <- 86400 / time_resolution * as.numeric(days(2)) / 86400 
rolling_window <- 4608
stop_threshold <- 3.51
signal_logic <- "scaled" 
model_type <- "raw" 
number_pairs <- 8 

4. Cross Validation September 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-09-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 14 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1 USDT_DASH  USDT_ZEC -5.380957
##  2  USDT_ZEC USDT_DASH -5.304666
##  3  USDT_ZEC  USDT_XMR -3.961555
##  4  USDT_XMR  USDT_ZEC -3.840609
##  5  USDT_REP  USDT_ZEC -3.768945
##  6 USDT_DASH  USDT_XMR -3.713431
##  7  USDT_REP  USDT_XMR -3.666970
##  8  USDT_ZEC  USDT_REP -3.597644
##  9  USDT_REP USDT_DASH -3.591031
## 10 USDT_DASH  USDT_REP -3.533729
## 11  USDT_XMR USDT_DASH -3.471097
## 12  USDT_ETH  USDT_LTC -3.415202
## 13  USDT_XMR  USDT_REP -3.358202
## 14  USDT_LTC  USDT_ETH -3.158724

5. Cross Validation August 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-08-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 15 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_REP  USDT_ZEC -6.427602
##  2  USDT_ZEC  USDT_REP -6.361551
##  3  USDT_LTC  USDT_REP -6.019389
##  4  USDT_REP  USDT_LTC -5.887493
##  5  USDT_ETH  USDT_ZEC -5.275862
##  6  USDT_ZEC  USDT_ETH -5.239483
##  7  USDT_LTC  USDT_ZEC -4.881401
##  8  USDT_ZEC  USDT_LTC -4.615123
##  9  USDT_REP  USDT_ETH -4.400386
## 10  USDT_ETH  USDT_REP -4.343464
## 11  USDT_XMR USDT_DASH -3.633904
## 12 USDT_DASH  USDT_XMR -3.622352
## 13  USDT_LTC  USDT_ETH -3.587846
## 14  USDT_ETH  USDT_LTC -3.279832
## 15  USDT_REP USDT_DASH -3.074027

6. Cross Validation July 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-07-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 30 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_REP  USDT_XMR -7.754601
##  2  USDT_XMR  USDT_REP -7.547141
##  3  USDT_REP  USDT_ZEC -5.773214
##  4  USDT_ZEC  USDT_REP -5.378645
##  5  USDT_REP  USDT_ETH -5.200190
##  6 USDT_DASH  USDT_LTC -4.827813
##  7  USDT_ETH  USDT_REP -4.778907
##  8  USDT_LTC USDT_DASH -4.602047
##  9  USDT_XMR  USDT_ZEC -4.596609
## 10  USDT_BTC  USDT_REP -4.537611
## # ... with 20 more rows

7. Cross Validation June 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-06-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 25 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_REP  USDT_XMR -6.864618
##  2  USDT_XMR  USDT_REP -6.776243
##  3  USDT_REP USDT_DASH -6.366745
##  4 USDT_DASH  USDT_REP -6.201406
##  5  USDT_XMR USDT_DASH -6.157884
##  6 USDT_DASH  USDT_XMR -6.083495
##  7  USDT_REP  USDT_ZEC -5.143941
##  8 USDT_DASH  USDT_ZEC -4.905823
##  9  USDT_ZEC  USDT_REP -4.898359
## 10  USDT_ZEC USDT_DASH -4.867936
## # ... with 15 more rows

8. Cross Validation May 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-05-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 19 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_XMR  USDT_ZEC -5.470549
##  2 USDT_DASH  USDT_ZEC -5.451411
##  3  USDT_XMR USDT_DASH -5.406064
##  4  USDT_ZEC USDT_DASH -5.365351
##  5 USDT_DASH  USDT_XMR -5.359242
##  6  USDT_ZEC  USDT_XMR -5.339887
##  7  USDT_XMR  USDT_ETH -4.744628
##  8  USDT_ETH  USDT_XMR -4.330518
##  9  USDT_ZEC  USDT_ETH -4.093348
## 10  USDT_REP  USDT_ZEC -3.932324
## 11  USDT_ETH  USDT_ZEC -3.800008
## 12  USDT_ZEC  USDT_REP -3.707490
## 13  USDT_REP USDT_DASH -3.707416
## 14  USDT_REP  USDT_ETH -3.678356
## 15  USDT_REP  USDT_XMR -3.618069
## 16 USDT_DASH  USDT_REP -3.599772
## 17  USDT_XMR  USDT_REP -3.575737
## 18 USDT_DASH  USDT_ETH -3.262312
## 19  USDT_ETH  USDT_REP -3.086158

9. Cross Validation April 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-04-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 31 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_REP USDT_DASH -5.521194
##  2 USDT_DASH  USDT_REP -5.259461
##  3  USDT_XMR  USDT_ETH -5.000186
##  4  USDT_XMR USDT_DASH -4.763988
##  5  USDT_REP  USDT_LTC -4.739366
##  6  USDT_XMR  USDT_BTC -4.179513
##  7  USDT_XMR  USDT_REP -4.145031
##  8  USDT_XMR  USDT_LTC -4.096683
##  9  USDT_ETH  USDT_XMR -4.092585
## 10  USDT_ETH USDT_DASH -4.072278
## # ... with 21 more rows

10. Cross Validation March 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-03-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 11 x 3
##      coin_y    coin_x  adf_stat
##       <chr>     <chr>     <dbl>
##  1 USDT_LTC  USDT_BTC -5.956905
##  2 USDT_LTC USDT_DASH -5.402013
##  3 USDT_LTC  USDT_ZEC -5.369503
##  4 USDT_LTC  USDT_REP -5.335098
##  5 USDT_LTC  USDT_XMR -5.232308
##  6 USDT_LTC  USDT_ETH -5.213867
##  7 USDT_XMR  USDT_BTC -3.461436
##  8 USDT_REP  USDT_XMR -3.190210
##  9 USDT_XMR  USDT_REP -3.171302
## 10 USDT_XMR USDT_DASH -3.151879
## 11 USDT_BTC  USDT_LTC -3.082816

11. Cross Validation February 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-02-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 28 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1 USDT_DASH  USDT_XMR -4.798816
##  2  USDT_XMR USDT_DASH -4.766860
##  3  USDT_ETH  USDT_BTC -4.739089
##  4  USDT_XMR  USDT_BTC -4.501784
##  5  USDT_BTC  USDT_ETH -4.425492
##  6  USDT_BTC  USDT_XMR -4.420176
##  7  USDT_REP  USDT_ETH -4.250730
##  8 USDT_DASH  USDT_BTC -4.147420
##  9  USDT_ETH  USDT_REP -4.040995
## 10  USDT_BTC USDT_DASH -4.031222
## # ... with 18 more rows

12. Cross Validation January 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-01-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency, 
          adf_threshold = adf_threshold, 
          rolling_window = rolling_window, 
          stop_threshold = stop_threshold, 
          signal_logic = signal_logic, 
          model_type = model_type, 
          number_pairs = number_pairs)
## # A tibble: 13 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_ZEC  USDT_BTC -4.288920
##  2 USDT_DASH  USDT_REP -4.266037
##  3  USDT_REP USDT_DASH -3.978244
##  4  USDT_BTC  USDT_ZEC -3.805267
##  5  USDT_ZEC  USDT_LTC -3.791873
##  6  USDT_ETH USDT_DASH -3.784793
##  7  USDT_REP  USDT_XMR -3.723175
##  8 USDT_DASH  USDT_ETH -3.445868
##  9  USDT_LTC  USDT_ZEC -3.424585
## 10  USDT_XMR  USDT_REP -3.417472
## 11 USDT_DASH  USDT_XMR -3.149624
## 12  USDT_ZEC  USDT_XMR -3.120132
## 13  USDT_ETH  USDT_REP -3.114262

13. Cross Validation Full

results <- backtest_strategy_full(pricing_data = pricing_data, 
                                  time_resolution = time_resolution, 
                                  train_window = train_window, 
                                  test_window = test_window, 
                                  quote_currency = quote_currency, 
                                  adf_threshold = adf_threshold, 
                                  rolling_window = rolling_window, 
                                  stop_threshold = stop_threshold, 
                                  signal_logic = signal_logic, 
                                  model_type = model_type)  
## [1] "Cross validating strategy."
## [1] "Using train set from 2016-12-16 to 2017-01-01."
## [1] "Using test set from 2017-01-01 to 2017-01-11."
## [1] "Cross validating strategy."
## [1] "Using train set from 2016-12-26 to 2017-01-11."
## [1] "Using test set from 2017-01-11 to 2017-01-21."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-01-05 to 2017-01-21."
## [1] "Using test set from 2017-01-21 to 2017-01-31."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-01-15 to 2017-01-31."
## [1] "Using test set from 2017-01-31 to 2017-02-10."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-01-25 to 2017-02-10."
## [1] "Using test set from 2017-02-10 to 2017-02-20."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-02-04 to 2017-02-20."
## [1] "Using test set from 2017-02-20 to 2017-03-02."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-02-14 to 2017-03-02."
## [1] "Using test set from 2017-03-02 to 2017-03-12."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-02-24 to 2017-03-12."
## [1] "Using test set from 2017-03-12 to 2017-03-22."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-03-06 to 2017-03-22."
## [1] "Using test set from 2017-03-22 to 2017-04-01."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-03-16 to 2017-04-01."
## [1] "Using test set from 2017-04-01 to 2017-04-11."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-03-26 to 2017-04-11."
## [1] "Using test set from 2017-04-11 to 2017-04-21."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-04-05 to 2017-04-21."
## [1] "Using test set from 2017-04-21 to 2017-05-01."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-04-15 to 2017-05-01."
## [1] "Using test set from 2017-05-01 to 2017-05-11."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-04-25 to 2017-05-11."
## [1] "Using test set from 2017-05-11 to 2017-05-21."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-05-05 to 2017-05-21."
## [1] "Using test set from 2017-05-21 to 2017-05-31."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-05-15 to 2017-05-31."
## [1] "Using test set from 2017-05-31 to 2017-06-10."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-05-25 to 2017-06-10."
## [1] "Using test set from 2017-06-10 to 2017-06-20."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-06-04 to 2017-06-20."
## [1] "Using test set from 2017-06-20 to 2017-06-30."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-06-14 to 2017-06-30."
## [1] "Using test set from 2017-06-30 to 2017-07-10."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-06-24 to 2017-07-10."
## [1] "Using test set from 2017-07-10 to 2017-07-20."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-07-04 to 2017-07-20."
## [1] "Using test set from 2017-07-20 to 2017-07-30."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-07-14 to 2017-07-30."
## [1] "Using test set from 2017-07-30 to 2017-08-09."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-07-24 to 2017-08-09."
## [1] "Using test set from 2017-08-09 to 2017-08-19."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-08-03 to 2017-08-19."
## [1] "Using test set from 2017-08-19 to 2017-08-29."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-08-13 to 2017-08-29."
## [1] "Using test set from 2017-08-29 to 2017-09-08."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-08-23 to 2017-09-08."
## [1] "Using test set from 2017-09-08 to 2017-09-18."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-09-02 to 2017-09-18."
## [1] "Using test set from 2017-09-18 to 2017-09-28."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-09-12 to 2017-09-28."
## [1] "Using test set from 2017-09-28 to 2017-10-08."
ggplot(results, aes(x = date_time)) + 
  geom_line(aes(y = return_strategy_cumulative), colour = "blue", size = 1) + 
  geom_hline(yintercept = 1, colour = "black") + 
  labs(title = "Strategy Return vs Buy Hold Return", x = "Date", y = "Cumulative Return") 

print(results[["return_strategy_cumulative"]][nrow(results)]) 
## [1] 4.683395